%--------------------------------------------------------------------------
% setup data
WinsorLevelY = 0.001; WinsorLevelX = 0.005; 
WinsorizeYRet = false;
MinStocks = 500;
ExcludeMicro = true; 

load USData;
ym = round(ymd/100); clear ymd;
Return11 = NaN(N,T,'single');
for t = 11:T
    Return11(:,t) = prod(1+Return(:,t-10:t),2) - 1;
end
MV(MV==0) = NaN; 
IsMicro = false(N,T);
for t = 6:12:T
    m = MV(:,t); m(~isfinite(m)) = []; m = sort(m);
    mcsum = cumsum(m);
    ii = mcsum <= 0.03*mcsum(end);
    fii = find(ii);
    brkpt = m(fii(end));
    idx = MV(:,t) <= brkpt;
    for s = t:min(t+11,T)
        IsMicro(idx,s) = true;
    end
end
clear t x Ns BrkPts idx s ii fii brkpt;

MV = log(MV); MV(~isfinite(MV)) = NaN;
BtoM(BtoM<0) = NaN; BtoM = log(BtoM); BtoM(~isfinite(BtoM)) = NaN;

%NumEst(~isfinite(NumEst)) = 0; Anlyst = log(1+NumEst); Anlyst(~isfinite(Anlyst)) = NaN; clear NumEst;
NumEst_New(~isfinite(NumEst_New)) = 0; Anlyst = log(1+NumEst_New); Anlyst(~isfinite(Anlyst)) = NaN; clear NumEst_New;

Turn = log(1+Turn); Turn(~isfinite(Turn)) = NaN;

XVARS = NaN(N,T,7,'single');
XVARS(:,:,1) = -BtoM; %DT
XVARS(:,:,2) = Turn; clear Turn; %LS
XVARS(:,:,3) = -Anlyst; clear Anlyst; %HLS
XVARS(:,:,4) = -Hi52w; clear Hi52w; %GH
XVARS(:,:,5) = -DGW; clear DGW; %DGW
XVARS(:,:,6) = -COGStoTotalAssets; clear COGStoTotalAssets; %SS
XVARS(:,:,7) = TotVol1y; clear TotVol1y; %Z
XVARS(~isfinite(XVARS)) = NaN;

T1 = find(ym==196307); T2 = find(ym==199212); T3 = find(ym==200801); T4 = find(ym==200912);
tRangeAll = (T1:T)';
tRangeFirst = (T1:T2)';
tRangeSecond = (T2:T)';
TFirst = T1;
clear T1 T2 T3 T4;

%------------------------------------------------------------------
%Run regression with R11 and controls on the RHS

ToDisp = cell(3,1);
FMcoef = NaN(T,7); Nstocks = NaN(T,1); adjR2 = NaN(T,1);
for t = TFirst:T
    Y = Return(:,t);
    X = [Return11(:,t-2) MV(:,t-1) BtoM(:,t-1) GPtoA(:,t-1) AG(:,t-1) Return(:,t-1)];
    idx = isfinite(Y) & all(isfinite(X),2) & ~IsMicro(:,t-2);
    if sum(idx)>MinStocks

        % winsorize across all stocks
        if WinsorizeYRet
            y = winsorize(Y(idx),WinsorLevelY);
        else
            y = Y(idx);
        end
        x = winsorize(X(idx,:),WinsorLevelX);

        % standardize across all stocks
        xs = standardize(x);
        xs2 = [xs(:,1:2) xs(:,1).*xs(:,2) xs(:,3:6)]; % no constant yet
        xx = [xs2 ones(length(y),1)];

        if any(sum(isfinite(xx))==0); continue; end
        if rank(xx) == size(xx,2)
            [bb,adjR2(t)] = myols_simple(xx,y); Nstocks(t) = length(y);
            FMcoef(t,:) = bb(1:7);
        end
    end
end
for sample = 1:3
    switch sample
        case 1, tRange = tRangeAll;
        case 2, tRange = tRangeFirst;
        case 3, tRange = tRangeSecond;
    end
    m = squeeze(mean(FMcoef(tRange,:),'omitnan')); s = squeeze(std(FMcoef(tRange,:),'omitnan')); n = squeeze(sum(isfinite(FMcoef(tRange,:)))); tstat = sqrt(n).*m./s;
    ToDisp{sample} = [100*m(1);tstat(1);NaN(4,1); mean(Nstocks(tRange,:),'omitnan'); 100*mean(adjR2(tRange,:),'omitnan')];
end

%------------------------------------------------------------------
%Run regression with R11, X, R11*X, and controls on the RHS
for xvar = 1:7
    FMcoef = NaN(T,9); Nstocks = NaN(T,1); adjR2 = NaN(T,1);
    for t = TFirst:T
        Y = Return(:,t);
        if xvar == 1
            X = [Return11(:,t-2) XVARS(:,t-1,xvar) MV(:,t-1) GPtoA(:,t-1) AG(:,t-1) Return(:,t-1)];
        else
            X = [Return11(:,t-2) XVARS(:,t-1,xvar) MV(:,t-1) BtoM(:,t-1) GPtoA(:,t-1) AG(:,t-1) Return(:,t-1)];
        end
        idx = isfinite(Y) & all(isfinite(X),2) & ~IsMicro(:,t-2);
        if sum(idx)>MinStocks

            % winsorize across all stocks
            if WinsorizeYRet
                y = winsorize(Y(idx),WinsorLevelY);
            else
                y = Y(idx);
            end
            x = winsorize(X(idx,:),WinsorLevelX);

            % standardize across all stocks
            xs = standardize(x);
            if xvar==3 % convert to residual analyst
                ytemp = xs(:,2); xtemp = [ones(sum(idx),1) xs(:,3)];
                xs(:,2) = ytemp - xtemp*(xtemp\ytemp);
            end
            if xvar == 1
                xs2 = [xs(:,1:2) xs(:,1).*xs(:,2) xs(:,3) xs(:,1).*xs(:,3) xs(:,4:6)]; % no constant yet
            else
                xs2 = [xs(:,1:2) xs(:,1).*xs(:,2) xs(:,3) xs(:,1).*xs(:,3) xs(:,4:7)]; % no constant yet
            end
            xx = [xs2 ones(length(y),1)];

            if any(sum(isfinite(xx))==0); continue; end
            if rank(xx) == size(xx,2)
                [bb,adjR2(t)] = myols_simple(xx,y);
                if xvar == 1
                    FMcoef(t,1:9) = [bb(1:5); NaN; bb(6:8)]; Nstocks(t) = length(y);
                else
                    FMcoef(t,1:9) = bb(1:9); Nstocks(t) = length(y);
                end
            end
        end
    end
    for sample = 1:3
        switch sample
            case 1, tRange = tRangeAll;
            case 2, tRange = tRangeFirst;
            case 3, tRange = tRangeSecond;
        end
        m = squeeze(mean(FMcoef(tRange,:),'omitnan')); s = squeeze(std(FMcoef(tRange,:),'omitnan')); n = squeeze(sum(isfinite(FMcoef(tRange,:)))); tstat = sqrt(n).*m./s;
        D1 = []; for i = 1:3; D1 = [D1;100*m(i);tstat(i)]; end; D1 = [D1; mean(Nstocks(tRange,:),'omitnan'); 100*mean(adjR2(tRange,:),'omitnan')];
        ToDisp{sample} = [ToDisp{sample} D1];
    end
end

D = [ToDisp{1} NaN(8,1) ToDisp{2} NaN(8,1) ToDisp{3}];
writematrix(D,FileName, 'Sheet', SheetName, 'Range','b3:aa10', 'AutoFitWidth',false);
